*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*	This program produces a file containing pscores for all match years
*	between 2012 and 2019 (enrollment years 2013-2020).
*	----------------------------------------------------------------------------

	clear all
	set more off

*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

***	construct pscores

	forval year = 2012/2019 {

	use "${cleandata}match_`year'.dta", clear

*	preliminaries

	//rescaling lottery numbers
	gen double lotterynum = tiebreaker / 1000000

	//marginal priorities (last priority given offers)
	bys prg: egen marginal_priority = max(priority * offer)
	gen marginal = priority == marginal_priority

	//cutoffs (last lottery number given offer in marginal priority)
	bys prg: egen double lottery_cutoff = max(lotterynum * offer * marginal)
	//this cutoff var is only non-zero when relevant
	gen double effective_cutoff = marginal * lottery_cutoff

*	Thetas

	gen Theta_a = priority < marginal_priority
	gen Theta_c = priority == marginal_priority
	gen Theta_n = priority > marginal_priority

*	characterizing better set risk (for MID calculation)
	//(the better set for {student i, school k} is the set of schools i prefers to k)

	sort stu choice

	//ever Theta_a in your better set
	gen ever_Theta_a = 0
	by stu: replace ever_Theta_a = max(ever_Theta_a[_n-1], Theta_a[_n-1]) if _n > 1

	//always Theta_n in your better set
	gen always_Theta_n = 1
	by stu: replace always_Theta_n = min(always_Theta_n[_n-1], Theta_n[_n-1]) if _n > 1

	//ever Theta_c in your better set
	gen ever_marginal = 0
	by stu: replace ever_marginal = max(ever_marginal[_n-1], Theta_c[_n-1]) if _n > 1

	//never Theta_a in your better set
	gen either_Theta_cn = max(Theta_c,Theta_n)
	gen always_Theta_cn = 1
	by stu: replace always_Theta_cn = min(always_Theta_cn[_n-1], either_Theta_cn[ _n-1 ] ) if _n > 1

	//never Theta_a, but Theta_c at least once in your better set
	gen betterset_risk = always_Theta_cn & ever_marginal

*	MID (Most Informative Disqualification)

	sort stu choice

	gen mid = .

	//MID=0 if always Theta_n in better set
	replace mid = 0 if always_Theta_n == 1

	//MID=1 if ever Theta_a in better set
	replace mid = 1 if ever_Theta_a == 1

	//MID in [0,1] if have non-degenerate better set risk
	by stu: replace mid = max(mid[_n - 1], effective_cutoff[_n-1]) if betterset_risk == 1

	//check
	count if mid == .
	assert r(N) == 0

*	formula pscores

	gen double pscore_form = .
	replace pscore_form = 0 if Theta_n == 1 | ever_Theta_a == 1
	replace pscore_form = max(0,1-mid) if Theta_a == 1 & ever_Theta_a == 0
	replace pscore_form = max(0,lottery_cutoff-mid) if Theta_c == 1 & ever_Theta_a == 0

	count if pscore_form == .
	assert r(N) == 0

*	frequency pscores

	bys prg Theta_? mid: egen double pscore_freq = mean(offer)

	//for kids in Theta_n mid is irrelevant
	bys prg Theta_n: egen double pscore_freq_n = mean(offer) if Theta_n == 1
	replace pscore_freq = pscore_freq_n if Theta_n == 1

	count if pscore_freq == .
	assert r(N) == 0

**	save
	keep stu choice prg sch schname priority marginal Theta_? ///
		pscore_form pscore_freq grade offer

	gen year_app = `year'
	save "${cleandata}pscores_`year'.dta", replace

	}

***	build

*	append pscores

	clear
	set obs 1
	forval year = 2012/2019{
		append using "${cleandata}pscores_`year'.dta", gen(append_`year')
	}
	drop in 1

*	sample restrictions

	drop if inlist(grade,"EC","ece3","ece4","k")

	//remove leading zeros
	destring grade, replace
	tostring grade, replace

*	sectors

	rename sch schoolnum
	merge m:1 schoolnum grade using "${rawdata}schools/dps_school_classification_all_years.dta", keep(1 3) gen(all_school_merge)
	rename schoolnum sch
	destring grade, gen(app_grade)

	//STRIVE Prep Elementary Kinder Full Day
	replace strive = 1 		if year_app == 2014 & sch ==  171
	replace cmo = 1 		if year_app == 2014 & sch ==  171
	replace charter = 1 	if year_app == 2014 & sch ==  171

	//West HS
	replace traditional = 1 if year_app == 2014 & sch == 458

	//Sims Fayola HS
	replace charter = 1 	if year_app == 2014 & sch == 512

*	aggregate pscores (to school level)

	// keeping only the vars we need; first nm is sort order dependent
	// some duplicates within stu-sch-grade-year based on prg, keep highest choice
	sort stu sch app_grade year choice
	collapse (sum) pscore_freq pscore_form offer (firstnm) schname ///
		charter innovation alternative contract magnet traditional ///
		non_cmo cmo kipp dsst strive, by(stu sch app_grade year)

	//censor pscores at 1
	foreach pscore of varlist pscore* {
		replace `pscore' = 1 if `pscore' > 1
	}

*	clean up

	//one kid getting two offers
	drop if offer == 2

	//check that students only get one offer to each school
	su offer
	assert r(max) == 1

	//rename (to differentiate from enrollment vars)
	foreach var in sch charter innovation alternative contract magnet traditional ///
		non_cmo cmo kipp dsst strive{
		rename `var' app_`var'
	}
	rename schname app_schname

	//for merging later
	gen year = year_app + 1

	//var labels
	label var year_app 	"Spring calendar year of application"
	label var app_grade "Grade applying to"
	label var app_sch 	"Code of school applying to"

***	save

	qui compress

	save "${cleandata}pscores", replace
